Now it is time to read in the data and get into a workable format for our problem

# read in the data
df <- read.csv('Data/NEISS/sport_category_final.csv')

# create a contigency table 
orig_table <- table(df$prod1,df$body_part)

# create a datafame from the contigency table (at least one that is intuitive)
tm<-as.data.frame.matrix(orig_table) # tm stands for table matrix

and some further cleanup. This time, we need to ensure that all of the classifications of the original data belong to the same categories of that in the Berger and Trinkaus paper.

head_neck <- tm$face + tm$neck + tm$head
shoulder_arm <- tm$`upper arm` + tm$`lower arm` + tm$shoulder + tm$elbow
hand <- tm$hand + tm$finger + tm$wrist
pelvis <- tm$`pubic region` + tm$hip 
leg <- tm$knee + tm$`lower leg` + tm$`upper leg`
foot <- tm$foot + tm$toe + tm$ankle
trunk <- tm$`upper trunk` + tm$back

Now let’s wrap this all into a final dataframe that we will use throughout the rest of this analysis.

final<-as.data.frame(cbind(head_neck,shoulder_arm,hand,pelvis,leg,foot,trunk))
rownames(final)<-rownames(tm) # index will be the activity name

Grabbing the values off of the Berger and Trinkaus paper is the first task

sample1<-c(8,4,7,1,1,3,3) # total sample
sample2<-c(7,4,7,1,0,3,1) # without djd
sample3<-c(6,4,5,1,1,3,1) # without shandidar 1
sample4<-c(5,4,5,1,0,3,0) # without djd or shandidar 1

Now it is time to run the actual analysis. We are going to create some tables the involve running a chi square test on all of the samples against all of the activities as well as calculating Cramer’s V.

# total neanderthal sample
neander_total_stat_table<-t(apply(final,1,function(x) {
  two_rows    =   cbind(sample1,x)                      # compare each row with neanderthal sample
  cramer_stat =   CramerV(two_rows,conf.level=0.90)     # perform cramer's v and find confidence intervals
  chi2test    =   chisq.test(two_rows)                  # perform chi square
  chi2_cols   =   c(unname(chi2test$statistic), chi2test$p.value)   # grab chi square and p-value
  frame       =   cbind(cramer_stat,chi2_cols)          # bind cramer's v, conf intervals, chi square and p-value together
  final_frame =   frame[-c(6)]                          # drop stupid extra chi square column
  names(final_frame) = c("cramer's v", "l.ci", "u.ci", "chi square", "p-value")   # name columns
  return(final_frame)                                   # return frame
}))

final2<-final[-c(6,9),]

neander_wo_djd_stat_table<-t(apply(final2,1,function(x) {
  two_rows    =   cbind(sample2,x)
  cramer_stat =   CramerV(two_rows,conf.level=0.90)
  chi2test    =   chisq.test(two_rows)
  chi2_cols   =   c(unname(chi2test$statistic), chi2test$p.value)
  frame       =   cbind(cramer_stat,chi2_cols)
  final_frame =   frame[-c(6)]
  names(final_frame) = c("cramer's v", "l.ci", "u.ci", "chi square", "p-value")
  return(final_frame)
}))

# sample without shandidar
neander_wo_shan_stat_table<-t(apply(final,1,function(x) {
  two_rows    =   cbind(sample3,x)
  cramer_stat =   CramerV(two_rows,conf.level=0.90)
  chi2test    =   chisq.test(two_rows)
  chi2_cols   =   c(unname(chi2test$statistic), chi2test$p.value)
  frame       =   cbind(cramer_stat,chi2_cols)
  final_frame =   frame[-c(6)]
  names(final_frame) = c("cramer's v", "l.ci", "u.ci", "chi square", "p-value")
  return(final_frame)
}))


# sample without shandidar or djd        

final3<-final[-c(5,6,9,14,26,54,70),]     

neander_wo_djd_shan_stat_table<-t(apply(final3,1,function(x) {     
  two_rows    =   cbind(sample4,x)
  cramer_stat =   CramerV(two_rows,conf.level=0.90)
  chi2test    =   chisq.test(two_rows)
  chi2_cols   =   c(unname(chi2test$statistic), chi2test$p.value)
  frame       =   cbind(cramer_stat,chi2_cols)
  final_frame =   frame[-c(6)]
  names(final_frame) = c("cramer's v", "l.ci", "u.ci", "chi square", "p-value")
  return(final_frame)
}))

Create a function to clean up some vital information about the Chi Square test

chi2cleanup<-function(table){
  
  # read in one of the chi square tables (neander_total_stat_table, neander_wo_djd_stat_table, neander_wo_shan_stat_table or neander_wo_djd_shan_stat_table)
  frame<-as.data.frame(table,row.names = rownames(table))
  names(frame) = c("cramer's v", "l.ci", "u.ci", "chi square", "p-value")
  fin = na.omit(frame) 
  
  # create two callable objects, 1) the rows that have NAs and 2) the final data frame for some final manipulation
  c<- list(
    rowna=frame[is.nan(frame$`chi square`),],   # $rowna 
    final=fin,                        # $final
    similar=fin[fin$`p-value` > .05,] # $similar (activities that are similar, P > 0.05)
  )
  return(c)
}

Analysis

# name cleaned up chi square tables
n_tot<-chi2cleanup(neander_total_stat_table)  # neanderthal total
n_djd<-chi2cleanup(neander_wo_djd_stat_table) # neanderthal w/o djd
n_s<-chi2cleanup(neander_wo_shan_stat_table)   # neanderthal w/o shan
n_sd<-chi2cleanup(neander_wo_djd_shan_stat_table) # neanderthal w/o shan or djd

Find similar activities per sample

n_tot$similar   # neanderthal total
##                            cramer's v l.ci      u.ci chi square    p-value
## altar                       0.2293476    0 0.2795863   3.576823 0.73372229
## banisters                   0.4265850    0 0.5519008  11.100461 0.08532075
## benches                     0.2781826    0 0.3621662   8.976722 0.17489146
## bleachers                   0.2658938    0 0.3441546  10.958421 0.08967001
## diving board                0.2616941    0 0.3413494   7.396248 0.28575071
## flying discs or boomerangs  0.2944192    0 0.3805758  11.355433 0.07799320
## golf                        0.2181053    0 0.2815384  11.749776 0.06778954
## golf carts                  0.2179176    0 0.2829501   9.972497 0.12581503
## lawn chair                  0.3791534    0 0.4912861  10.638039 0.10022845
## loading docks               0.4014090    0 0.5178498  11.923561 0.06369594
## poles                       0.3553561    0 0.4597138  11.112463 0.08496219
## roller hockey               0.4022535    0 0.5229394   9.546665 0.14508720
## snow tubing                 0.3946921    0 0.5112343  10.748946 0.09645243
## swimsuit                    0.2165592    0 0.2824357   7.175379 0.30493252
## water tubing                0.1667062    0 0.1911635   2.973634 0.81214870
## windsurfing                 0.2221475    0 0.2859673  12.534779 0.05104817
n_djd$similar   # neanderthal w/o djd
##                            cramer's v l.ci       u.ci chi square
## altar                      0.27047673    0 0.34528330   4.682090
## baseball                   0.04624864    0 0.06030432   6.979352
## benches                    0.29226244    0 0.37993459   9.566742
## bleachers                  0.27244015    0 0.35234500  11.207769
## diving board               0.32441635    0 0.41993277  10.945581
## flying discs or boomerangs 0.30639724    0 0.39527697  11.922667
## golf carts                 0.24203273    0 0.31208125  12.067448
## paddle ball                0.36939368    0 0.47797835  11.052587
## poles                      0.31902142    0 0.41570805   8.549072
## roller hockey              0.38828687    0 0.50618416   8.292168
## swimsuit                   0.25182983    0 0.32746768   9.449321
## water tubing               0.23732547    0 0.30787081   5.801308
##                               p-value
## altar                      0.58518450
## baseball                   0.32276095
## benches                    0.14412345
## bleachers                  0.08216346
## diving board               0.09007295
## flying discs or boomerangs 0.06371642
## golf carts                 0.06048094
## paddle ball                0.08676471
## poles                      0.20057074
## roller hockey              0.21747021
## swimsuit                   0.14984055
## water tubing               0.44581190
n_s$similar     # neanderthal w/o shan
##                                cramer's v l.ci       u.ci chi square
## altar                          0.17836929    0 0.13613673   1.972567
## banisters                      0.45072369    0 0.58298172  11.173352
## baseball                       0.04605026    0 0.06003702   6.915363
## benches                        0.23624442    0 0.30717896   6.139257
## bleachers                      0.19842399    0 0.25753648   5.866440
## cheerleading                   0.17192926    0 0.22222508  11.380472
## diving board                   0.26433150    0 0.34472777   7.126857
## flying discs or boomerangs     0.23830238    0 0.31077076   7.098503
## golf                           0.17622636    0 0.22986673   7.484431
## golf carts                     0.18065056    0 0.23538386   6.657463
## lawn chair                     0.39038559    0 0.50628131  10.363262
## loading docks                  0.35299802    0 0.46004815   8.473317
## mountain/all-terrain biking    0.21713915    0 0.28141832  10.561468
## paddle ball                    0.35178829    0 0.45703598   9.776645
## patios/flooring                0.23899324    0 0.31070827   9.538668
## poles                          0.29146887    0 0.38003710   6.966236
## roller hockey                  0.33356498    0 0.43303142   5.897077
## snow tubing                    0.38226806    0 0.49740629   9.206119
## swimming pools (not specified) 0.23288837    0 0.30248709   9.871133
## swimsuit                       0.21242265    0 0.27676231   6.633137
## water tubing                   0.16976590    0 0.19281992   2.910867
## windsurfing                    0.19929201    0 0.25885982   9.849892
##                                   p-value
## altar                          0.92220419
## banisters                      0.08316429
## baseball                       0.32874544
## benches                        0.40777277
## bleachers                      0.43831681
## cheerleading                   0.07730557
## diving board                   0.30927482
## flying discs or boomerangs     0.31183373
## golf                           0.27835805
## golf carts                     0.35368900
## lawn chair                     0.11016446
## loading docks                  0.20543589
## mountain/all-terrain biking    0.10291280
## paddle ball                    0.13437860
## patios/flooring                0.14547269
## poles                          0.32398093
## roller hockey                  0.43481796
## snow tubing                    0.16231362
## swimming pools (not specified) 0.13018486
## swimsuit                       0.35610991
## water tubing                   0.81994803
## windsurfing                    0.13111741
n_sd$similar    # neanderthal w/o shan or djd
##                             cramer's v l.ci       u.ci chi square
## altar                       0.27044152    0 0.34172934   4.315178
## baseball                    0.04358653    0 0.05668606   6.189503
## benches                     0.27497775    0 0.35856059   8.090566
## bleachers                   0.23346681    0 0.30446467   7.957986
## cheerleading                0.17005634    0 0.22004504  11.047118
## diving board                0.34121391    0 0.44080629  11.526266
## flying discs or boomerangs  0.27735819    0 0.36072865   9.385163
## golf                        0.20341819    0 0.26422887   9.848193
## golf carts                  0.21722266    0 0.28244575   9.484323
## lawn chair                  0.42649705    0 0.55041710  11.823483
## loading docks               0.40086369    0 0.51973802  10.444960
## mountain/all-terrain biking 0.23074082    0 0.29783784  11.766333
## paddle ball                 0.36197358    0 0.47001334   9.957890
## patios/flooring             0.24713935    0 0.32085759  10.016769
## poles                       0.30452592    0 0.39721855   7.326147
## snow tubing                 0.45335991    0 0.58401200  12.332112
## swimsuit                    0.26351380    0 0.34212870   9.999291
## water tubing                0.25344844    0 0.32981028   6.295139
##                                p-value
## altar                       0.63410647
## baseball                    0.40230034
## benches                     0.23154305
## bleachers                   0.24119759
## cheerleading                0.08693106
## diving board                0.07341106
## flying discs or boomerangs  0.15304725
## golf                        0.13119225
## golf carts                  0.14811598
## lawn chair                  0.06602455
## loading docks               0.10712155
## mountain/all-terrain biking 0.06738931
## paddle ball                 0.12643658
## patios/flooring             0.12394763
## poles                       0.29173540
## snow tubing                 0.05495688
## swimsuit                    0.12468188
## water tubing                0.39095341

Lets look at what Rodeo Riders look like in comparison to the Neanderthal samples

n_tot$final['rodeo',]   # neanderthal total
##       cramer's v     l.ci      u.ci chi square     p-value
## rodeo  0.2198114 0.113429 0.2732467   23.62703 0.000611504
n_djd$final['rodeo',]   # neanderthal w/o djd
##       cramer's v      l.ci      u.ci chi square     p-value
## rodeo  0.2141958 0.1057345 0.2672423   22.25172 0.001090031
n_s$final['rodeo',]     # neanderthal w/o shan
##       cramer's v       l.ci      u.ci chi square    p-value
## rodeo  0.1858431 0.06622844 0.2358073   16.68168 0.01052714
n_sd$final['rodeo',]    # neanderthal w/o shan or djd
##       cramer's v       l.ci      u.ci chi square    p-value
## rodeo  0.1850306 0.06424588 0.2349721   16.43343 0.01160729
similarSelector<-function(frame,final_frame,neander_sample){ 
  
  require(reshape)
  require(plyr)
  indices=rownames(frame$similar)               # find rows to set as indices
  new_rows = final_frame[indices,]              # map those to original contigency table
  
  sample =append(c("neander","rodeo"),rownames(new_rows))  # add a new vector to use a "sample" column
  
  rodeo = final_frame['rodeo',] # add rodeo riders
  
  joined_rows = rbind(neander_sample,rodeo,new_rows) # join rows from neanderthal sample to new dataframe
  
  props = prop.table(as.table(as.matrix(joined_rows)),1)
  
  props<-as.data.frame.matrix(props)
  
  joined_cols = cbind(sample,props)       # add the "sample" column
  
  rownames(joined_cols) = NULL                  # remove the row indices 
      
  melted <- melt(joined_cols, id=(c("sample"))) # transpose contigency table
  
  return(melted)                
}
# an example of how similarSelector works
simToNeanderTotal<-similarSelector(n_tot,final,sample1)
simToNeanderWOdjd<-similarSelector(n_djd,final,sample2)
simToNeanderWOShan<-similarSelector(n_s,final,sample3)
simToNeanderWOdjdOrShan<-similarSelector(n_sd,final,sample4)

Find jus the neanderthals in the above dataframes

# extract just the neanderthal row for emphasis in plots
n2<-simToNeanderTotal[simToNeanderTotal$sample=='neander',]
d2<-simToNeanderWOdjd[simToNeanderWOdjd$sample=='neander',]
s2<-simToNeanderWOShan[simToNeanderWOShan$sample=='neander',]
sd<-simToNeanderWOdjdOrShan[simToNeanderWOdjdOrShan$sample=='neander',]

# extract just the rodeo riders for emphasis in plots
n3<-simToNeanderTotal[simToNeanderTotal$sample=='rodeo',]
d3<-simToNeanderWOdjd[simToNeanderWOdjd$sample=='rodeo',]
s3<-simToNeanderWOShan[simToNeanderWOShan$sample=='rodeo',]
sd3<-simToNeanderWOdjdOrShan[simToNeanderWOdjdOrShan$sample=='rodeo',]

Plot those that are similar

plotMaker<-function(total_frame,neander_frame,rodeo_frame){
  ggplot(data=total_frame, # this needs to be one of the dataframes directly above
         aes(x=factor(variable), y=value, 
             group=sample,
             color=sample)) + 
    geom_line() + 
    geom_point() +
    geom_point(data=neander_frame,aes(x=factor(variable), y=value, 
                                      group=sample, size = 4))+
    geom_line(data=neander_frame,aes(x=factor(variable), y=value, 
                          group=sample, size = 2))+ 
    geom_point(data=rodeo_frame,aes(x=factor(variable), y=value, 
                                      group=sample, size = 4))+
    geom_line(data=rodeo_frame,aes(x=factor(variable), y=value, 
                                     group=sample, size = 2))+ 
    scale_x_discrete("Proportion") +
    scale_y_continuous("Body Part")+
    guides(size=FALSE)
}

Plots

The total sample

plotMaker(simToNeanderTotal,n2,n3)

The sample with out djd

plotMaker(simToNeanderWOdjd,d2,d3)

The sample without Shandidar

plotMaker(simToNeanderWOShan,s2,s3)

The sample without djd or Shan

plotMaker(simToNeanderWOdjdOrShan,sd,sd3)